# coding: utf-8
import pandas as pd
import numpy as np
from joblib import load, dump
from src.utils import plot_performance
import matplotlib.pyplot as plt

####################################################################################
####################################################################################
############################# AREA OF INPUT PARAMETERS #############################
####################################################################################
####################################################################################
##### Environment Parameters
PATH = "." # Root directory, should be the same path this "README.md" file locates
PATH_DATA = f"{PATH}/data" # Path for data
PATH_MODELS = f"{PATH}/models" # Path for models
PATH_PICS = f"{PATH}/pics" # Path for graphs

##### Parameters for Simulated andits
list_seed_simulation = list(range(1989, 1999))  # To reproduce the results, use simulated seed from 1989 to 1998
T = 50000  # T, use 50000 to reproduce the results
budget = [1600, 2200] # Budget constraint, to reproduce, set the list [1600, 2200]

####################################################################################
####################################################################################
####################### Collect the Simulated Results ##############################
####################################################################################
####################################################################################
list_C = [0.025, 0.1, 0.3]
list_C_str = ["0" + str(x)[2:] for x in list_C]
T_range = np.array(list(range(1, T + 1)))

dict_results_benchmark = {}
dict_results_regrets = {}
dict_results_regrets_cost1 = {}
dict_results_regrets_cost2 = {}

### Collect the results
for budget_ in budget:
    print(budget_)
    dict_results_benchmark_ = {}
    dict_results_regrets_ = {}
    dict_results_regrets_cost1_ = {}
    dict_results_regrets_cost2_ = {}

    for C_ in list_C:
        print(C_)
        C_str_ = "0" + str(C_)[2:]

        for index_, seed_ in enumerate(list_seed_simulation):
            print(seed_)
            benchmark_ = load(f"{PATH_MODELS}/budget_{budget_}/random_seed_{seed_}/policy_optim_static.pkl")
            linear_bandits_ = load(f"{PATH_MODELS}/budget_{budget_}/random_seed_{seed_}/linear_bandits_C{C_str_}.pkl")
            logistic_bandits_ = load(f"{PATH_MODELS}/budget_{budget_}/random_seed_{seed_}/logistic_bandits_C{C_str_}.pkl")

            reward_benchmark_ = np.cumsum(benchmark_["reward"].values)
            
            regret_linear_ = np.cumsum(benchmark_["reward"].values - np.array(linear_bandits_.rewards))
            regret_logistic_ = np.cumsum(benchmark_["reward"].values - np.array(logistic_bandits_.rewards))

            regret_cost1_linear_ = np.cumsum(np.array(linear_bandits_.costs1) - budget_/ T )
            regret_cost1_logistic_ = np.cumsum(np.array(logistic_bandits_.costs1)- budget_/ T )

            regret_cost2_linear_ = np.cumsum(np.array(linear_bandits_.costs2) - budget_/ T )
            regret_cost2_logistic_ = np.cumsum(np.array(logistic_bandits_.costs2)- budget_/ T )


            if index_ == 0:
                reward_benchmark_f = reward_benchmark_
                
                regret_linear_f = regret_linear_
                regret_logistic_f = regret_logistic_

                regret_cost1_linear_f = regret_cost1_linear_
                regret_cost1_logistic_f = regret_cost1_logistic_

                regret_cost2_linear_f = regret_cost2_linear_
                regret_cost2_logistic_f = regret_cost2_logistic_

            else:
                reward_benchmark_f = np.column_stack([reward_benchmark_f, reward_benchmark_])
                
                regret_linear_f = np.column_stack([regret_linear_f, regret_linear_])
                regret_logistic_f = np.column_stack([regret_logistic_f, regret_logistic_])

                regret_cost1_linear_f = np.column_stack([regret_cost1_linear_f, regret_cost1_linear_])
                regret_cost1_logistic_f = np.column_stack([regret_cost1_logistic_f, regret_cost1_logistic_])

                regret_cost2_linear_f = np.column_stack([regret_cost2_linear_f, regret_cost2_linear_])
                regret_cost2_logistic_f = np.column_stack([regret_cost2_logistic_f, regret_cost2_logistic_])

        dict_results_benchmark_[f"Benchmark"] =  reward_benchmark_f.mean(axis=1)
        dict_results_benchmark_[f"Benchmark - std"] =  reward_benchmark_f.std(axis=1)
        
        dict_results_regrets_[f"Box D adaptive policy (for linear CBwK) --- C={C_}"] = regret_linear_f.mean(axis=1)
        dict_results_regrets_[f"Box D adaptive policy (for linear CBwK) --- C={C_} - std"] = regret_linear_f.std(axis=1)
        dict_results_regrets_[f"Box C adaptive policy (specific to the conversion model) --- C={C_}"] = regret_logistic_f.mean(axis=1)
        dict_results_regrets_[f"Box C adaptive policy (specific to the conversion model) --- C={C_} - std"] = regret_logistic_f.std(axis=1)

        dict_results_regrets_cost1_[f"Box D adaptive policy (for linear CBwK) --- C={C_}"] = regret_cost1_linear_f.mean(axis=1)
        dict_results_regrets_cost1_[f"Box D adaptive policy (for linear CBwK) --- C={C_} - std"] = regret_cost1_linear_f.std(axis=1)
        dict_results_regrets_cost1_[f"Box C adaptive policy (specific to the conversion model) --- C={C_}"] = regret_cost1_logistic_f.mean(axis=1)
        dict_results_regrets_cost1_[f"Box C adaptive policy (specific to the conversion model) --- C={C_} - std"] = regret_cost1_logistic_f.std(axis=1)

        dict_results_regrets_cost2_[f"Box D adaptive policy (for linear CBwK) --- C={C_}"] = regret_cost2_linear_f.mean(axis=1)
        dict_results_regrets_cost2_[f"Box D adaptive policy (for linear CBwK) --- C={C_} - std"] = regret_cost2_linear_f.std(axis=1)
        dict_results_regrets_cost2_[f"Box C adaptive policy (specific to the conversion model) --- C={C_}"] = regret_cost2_logistic_f.mean(axis=1)
        dict_results_regrets_cost2_[f"Box C adaptive policy (specific to the conversion model) --- C={C_} - std"] = regret_cost2_logistic_f.std(axis=1)

        print("----------------------------")
        
    dict_results_benchmark[budget_] = dict_results_benchmark_
    dict_results_regrets[budget_] = dict_results_regrets_
    dict_results_regrets_cost1[budget_] = dict_results_regrets_cost1_
    dict_results_regrets_cost2[budget_] = dict_results_regrets_cost2_


list_dict_results = []

for plot_ in ["regrets", "regrets_cost1",  "regrets_cost2"]:
    dict_results_ = eval(f"dict_results_{plot_}")
    for budget_ in [1600, 2200]:
        list_dict_results.append(dict_results_[budget_])        

####################################################################################
####################################################################################
####################### Plot and export the results ################################
####################################################################################
####################################################################################

fig, axs = plt.subplots(3, 2, figsize=(32, 36))
axs = axs.flatten()
for dict_results_, ax_ in zip(list_dict_results, axs):
    plot_performance(dict_results_, T_range=T_range, keep_legend = False, ax=ax_, sampling = 0.1)
handles, labels = ax_.get_legend_handles_labels()
labels = [x.split("---")[1].strip() for x in labels]
fig.legend(handles[3:], labels[3:], bbox_to_anchor=(0.42, 0.94), 
           prop={'size': 20}, title = "Box C adaptive policy (specific to the conversion model)", title_fontsize = 20)
fig.legend(handles[:3], labels[:3], bbox_to_anchor=(0.81, 0.94), 
           prop={'size': 20}, title = "Box D adaptive policy (for linear CBwK)", title_fontsize = 20)

fig.savefig(f"{PATH_PICS}/plot_performance_with_legend.pdf", dpi=300, bbox_inches='tight') 

